/* Read in counts of males and females by age in 1989 census and by oblast. Append them. Calculate the number of women of each age
* in each year before 1989 by oblast. Calculate the number of 15-44 year old women in each oblast each year.*/

set more off

* Import file that contains area codes for birth places and classifies early and late legalizers
import excel using "$data/birthplace_codes.xlsx", clear firstrow sheet("Sheet1") cellrange(A1:E111)
sort Birthplace
save "$datatemp/birthplace_codes.dta", replace
* Import data containing the number of women in each age group in each oblast in 1989 (from the 1989 census)
set more off
forvalues i=1/88{
import excel using "$data/women_age_census_1989.xlsx", clear firstrow sheet("Sheet`i'") cellrange(A5:E105)
destring Age, gen(age_num)
destring Women, gen(women_num)
sort Birthplace_code
save "$datatemp/women_age_census_1989_`i'.dta", replace
}
use "$datatemp/women_age_census_1989_1.dta", clear
forvalues i = 2/88{
append using "$datatemp/women_age_census_1989_`i'.dta"
}
save "$datatemp/women_age_census_1989_all.dta", replace

use "$datatemp/birthplace_codes.dta", clear
sort Birthplace_code
save "$datatemp/birthplace_codes.dta", replace

use "$datatemp/women_age_census_1989_all.dta", clear
sort Birthplace_code
merge (Birthplace_code) using "$datatemp/birthplace_codes.dta"
keep if _merge==3

sort age_num Birthplace
by age_num: gen num = _n

by age_num: gen ao = women_num[21]+women_num[18] if num==78
by age_num: replace ao = women_num[42] if num==4
by age_num: replace ao = women_num[84] if num==19
by age_num: replace ao = women_num[11] if num==39
by age_num: replace ao = women_num[14] if num==17
by age_num: replace ao = women_num[58]+women_num[76]+women_num[13] if num==34
by age_num: replace ao = women_num[54] if num==2
by age_num: replace ao = women_num[27] if num==74
by age_num: replace ao = women_num[53] if num==33
replace ao = 0 if ao==.
gen women_num89 = women_num-ao
drop women_num
gen women_num=women_num89
drop women_num89

** Create the number of 15-44 year old women for the whole country
gen year_cens=1989
ge id =_n

** Because the age in the census is recorded as of Jan 12-19 of 1989, then for most people their age is age+1 during 1989
gen age_adj=age_num+1

expand age_adj
bysort id: gen age=_n
bysort id: gen year=year_cens-age_adj+age
drop if age<15|age>44


collapse (sum) women_num, by(Birthplace_code year)
rename women_num numwomen_1544
rename year Year
sort Birthplace_code Year
save "$datatemp/numwomen_1544_89cens_official.dta", replace
